# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# HEADER ####
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # ##
#
#
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Please adjust this to point to the -> replication folder
#   -> uncomment the next line and insert path to replication folder
#
# setwd(base <- 'C:/Users/user/path/to/replication')
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
#
#
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# This file contains the R code needed to replicate results in:
#
#  Section 2 and Online Appendices A-C of
#
#  Mense, Andreas (2024). The Impact of New Housing Supply on the
#   Distribution of Rents.
#
# Author: Andreas Mense
# Version: October 16, 2024
#
#
# - - - - - - - - - - - - - - - - - - - -
# sessionInfo() below this line
# - - - - - - - - - - - - - - - - - - - -
#
# R version 4.3.2 (2023-10-31 ucrt)
# Platform: x86_64-w64-mingw32/x64 (64-bit)
# Running under: Windows 10 x64 (build 19045)
#
# Matrix products: default
#
#
# locale:
# [1] LC_COLLATE=German_Germany.utf8  LC_CTYPE=German_Germany.utf8    LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C
# [5] LC_TIME=German_Germany.utf8
#
# time zone: Europe/Berlin
# tzcode source: internal
#
# attached base packages:
# [1] stats     graphics  grDevices utils     datasets  methods   base
#
# other attached packages:
#  [1] quantreg_5.98      SparseM_1.84-2     sf_1.0-14          readxl_1.4.3       plm_2.6-3          RColorBrewer_1.1-3 raster_3.6-26
#  [8] sp_2.1-3           lfe_2.9-0          Matrix_1.6-1.1
#
# loaded via a namespace (and not attached):
#  [1] sandwich_3.1-0     utf8_1.2.4         generics_0.1.3     class_7.3-22       KernSmooth_2.23-22 lattice_0.21-9     rematch_2.0.0
#  [8] digest_0.6.33      magrittr_2.0.3     grid_4.3.2         cellranger_1.1.0   e1071_1.7-14       DBI_1.1.3          Formula_1.2-5
# [15] survival_3.5-7     fansi_1.0.6        codetools_0.2-19   Rdpack_2.6         cli_3.6.2          rlang_1.1.2        units_0.8-5
# [22] rbibutils_2.2.16   miscTools_0.6-28   splines_4.3.2      tools_4.3.2        parallel_4.3.2     MatrixModels_0.5-3 dplyr_1.1.4
# [29] bdsmatrix_1.3-6    maxLik_1.5-2       R6_2.5.1           vctrs_0.6.5        zoo_1.8-12         proxy_0.4-27       lifecycle_1.0.4
# [36] classInt_0.4-10    MASS_7.3-60        pkgconfig_2.0.3    terra_1.7-71       pillar_1.9.0       glue_1.6.2         Rcpp_1.0.11
# [43] collapse_2.0.9     tidyselect_1.2.0   tibble_3.2.1       lmtest_0.9-40      rstudioapi_0.15.0  xtable_1.8-4       nlme_3.1-163
# [50] compiler_4.3.2
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #


source("code/inc/make.table.R") # user-written function to produce tables from regression output

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# . 1 Supply and weather data #####
# . 1.1 Municipality-level ####
#
# Load municipality-level data (new supply and weather shocks)
d <- readRDS("data/orig/buildings_weather_shocks_panel_municip.RDS")

# End-of-year completions
# NX2U1: Units completed in all buildings, December
# NX2U1_11: Units completed in all buildings, November
# NX2U1_10: Units completed in all buildings, October
# NX2U2: Units completed in MFH buildings, December
# NX2U2_11: Units completed in MFH buildings, November
# NX2U2_10: Units completed in MFH buildings, October
d$NX2U1_autumn <- d$NX2U1_10 + d$NX2U1_11 + d$NX2U1 # in all buildings
d$NX2U2_autumn <- d$NX2U2_10 + d$NX2U2_11 + d$NX2U2 # in MFH buildings

# Yearly total supply
d$NX2U1_tot <- apply(d[,c("NX2U1", sprintf("NX2U1_%02d",1:11))], 1, sum)
# Avg yearly supply
d_avg_supply <- aggregate(data.frame(NX2U1_tot_avg = d$NX2U1_tot), by=list(AGS = d$AGS), FUN=mean)
d <- merge(d, d_avg_supply, by="AGS")

# # #
# ~~~ TAB 2 ~~~####
require(lfe)
mod <- "%s ~ %s | %s | 0 | AGS"
sf <- list() ; m <- 0
this.var <- "I(NX2U1_autumn/NX2U1_tot_avg)" # all units
fe <- "AGS + Jahr"
# Nov + Dec, Year FE + Municipality FE, rainfall
inst <- "scale(rainfall_spells_summer)"
fit <- felm(as.formula(sprintf(mod,this.var,inst,fe)), data=d) #
sf[[m <- m + 1]] <- summary(fit, robust=T)
#
# Rainfall longest spell
inst <- "scale(rainfall_longest_spell)"
fit <- felm(as.formula(sprintf(mod,this.var,inst,fe)), data=d) #
sf[[m <- m + 1]] <- summary(fit, robust=T)
#
# Rainfall number of spells > 5 days
inst <- "scale(rainfall_t5_spells)"
fit <- felm(as.formula(sprintf(mod,this.var,inst,fe)), data=d) #
sf[[m <- m + 1]] <- summary(fit, robust=T)
#
# Frost Depth
inst <- "scale(frost_depth_feb)"
fit <- felm(as.formula(sprintf(mod,this.var,inst,fe)), data=d) #
sf[[m <- m + 1]] <- summary(fit, robust=T)
#
# Both instruments
inst <- "scale(rainfall_spells_summer) + scale(frost_depth_feb)"
fit <- felm(as.formula(sprintf(mod,this.var,inst,fe)), data=d) #
sf[[m <- m + 1]] <- summary(fit, robust=T)
#
# Only MFH buildings
inst <- "scale(rainfall_spells_summer) + scale(frost_depth_feb)"
this.var <- "I(NX2U2_autumn/NX2U1_tot_avg)"
fit <- felm(as.formula(sprintf(mod,this.var,inst,fe)), data=d) #
sf[[m <- m + 1]] <- summary(fit, robust=T)
#
# Correlation b/w rainfall and frost depth instruments
cor(d$frost_depth_feb, d$rainfall_spells_summer)
summary(lm(scale(frost_depth_feb) ~ scale(rainfall_spells_summer) - 1, data=d))
#
make.table(
  cf = sf,
  labels = rbind(
    c("scale(rainfall_spells_summer)","Avg. summer rainfall spell", "(deviation from local average)"),
    c("scale(rainfall_longest_spell)","Longest summer rainfall spell", "(deviation from local average)"),
    c("scale(rainfall_t5_spells)","\\# of rainfall spells 5+ days", "(deviation from local average)"),
    c("scale(frost_depth_feb)","Frost depth in February ", "(deviation from local average)")
  ),
  f = "tables/TAB_2.tex",
  add.line=T,
  digits = 4,
  align.stars = F,
  add.stats = list(
    c("yes","yes","yes","yes", "yes", "yes"),
    c("yes","yes","yes","yes", "yes", "yes"),
    unlist(lapply(sf, function(x) sprintf("%.1f",x$P.fstat["F"]))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","Municipality FE", "F statistic (proj. model)","Observations"), # "adj. R$^2$",
  significance.level = .1
)
# # #


# ~~~ FIG O-B1 ~~~ #####
# Maps of Weather Shocks #
require(raster)
require(RColorBrewer)
midbreaks <- seq(-8, 8, .5)
breaks <- c(-9, midbreaks, 14)
nlevels <- length(breaks)
cols <- colorRampPalette(brewer.pal(9,"Spectral"))(nlevels - 1)
for(year in 2010:2017) {
  r <- raster("data/orig/shapes/raster/base_raster.asc")
  r[! is.na(r)] <- 0
  for(month in 7:9) r <- r + raster(sprintf(
    "data/orig/shapes/raster/spell_above_20mm_shocks_%04d%02d.asc", year, month
  ))
  png(sprintf("figures/FIG_O-B1_weather_shock_%04d.png",year), width=5, height = 4.5, units="cm", res = 600)
  par(mar=c(.05,0,.4,0),bg=NA)
  plot(
    r,
    axes=F, legend=F, horizontal=T, legend.mar = 0,
    breaks = breaks, col = cols,
    box=F,
  )
  dev.off()
}
r0 <- r
r0[! is.na(r0)] <- NA
png(sprintf("figures/FIG_O-B1_weather_shock_legend.png",year), width=8, height = 4, units="cm", res = 600)
par(mar=c(3,.1,0,.1),bg=NA)
plot(r0, legend=F, box=F, axes=F, horizontal=T)
plot(
  r0,
  legend.only=T,
  legend.shrink = 1,
  legend.width = .5,
  legend.mar = 2,
  horizontal=T,
  breaks = breaks,
  lab.breaks = c(
    NA,
    min(midbreaks),
    rep(NA, which(midbreaks==0)-2),
    0,
    rep(NA, which(midbreaks== max(midbreaks)) - which(midbreaks==0) - 1),
    max(midbreaks),
    NA
  ),
  legend.args = list(text="", line=0, cex=.5),
  col = cols,
  box=F,
  axis.args = list(
    tck = -0.0065, cex.axis=.75, line=-1
  )
)
dev.off()


# ~~~ FIG O-B2 ~~~#####
# # Completions in January-December of the same year
months <- c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec")
#
sf <- list()
coefs <- coefs2 <- NULL
require(plm)
inst <- "scale(rainfall_spells_summer) + scale(frost_depth_feb)"
for(k in 1:12) {
  this.var <- sprintf("I(NX2U1%s/NX2U1_tot_avg)",ifelse(k<12,sprintf("_%02d",k),""))
  fit <- felm(as.formula(sprintf(mod,this.var,inst,fe)), data=d)
  sf[[k]] <- summary(fit, robust=T)
  coefs <- rbind(coefs,sf[[k]]$coefficients["scale(rainfall_spells_summer)",])
  coefs2 <- rbind(coefs2,sf[[k]]$coefficients["scale(frost_depth_feb)",])
}
#
coef <- coefs[,1]
se <- coefs[,2]
coef2 <- coefs2[,1]
se2 <- coefs2[,2]
xrange <- 1:12
offset <- .1
alpha <- .05
yrange <- range(
  c(coef + qnorm(alpha/2) * se, coef - qnorm(alpha/2) * se),
  c(coef2 + qnorm(alpha/2) * se2, coef2 - qnorm(alpha/2) * se2))
png("figures/FIG_O-B2.png", height=5.7, width=6.8, units = "cm", res=300)
par(mar=c(1.15,1.6,0.1,0.1), mgp=c(.95,0.2,0))
plot(NA, ylim=yrange, xlim = range(xrange), axes=F, ylab="Impact on residential building completions (coefficient)", xlab="", cex=.39, cex.lab=.5)
abline(h=0)
for(k in 1:length(xrange)) {
  segments(
    x0=xrange[k] - offset,
    y0=coef[k] + qnorm(alpha/2) * se[k],
    y1=coef[k] - qnorm(alpha/2) * se[k],
    col="black"
  )
  segments(
    x0=xrange[k] - .04 - offset,
    x1=xrange[k] + .04 - offset,
    y0=coef[k],
    col="black"
  )
  # Instrument 2
  segments(
    x0=xrange[k] + offset,
    y0=coef2[k] + qnorm(alpha/2) * se2[k],
    y1=coef2[k] - qnorm(alpha/2) * se2[k],
    col="grey50"
  )
  segments(
    x0=xrange[k] - .04 + offset,
    x1=xrange[k] + .04 + offset,
    y0=coef2[k],
    col="grey50"
  )
}
axis(1, at = c(-2, 14))
axis(
  1, at=xrange, labels = months[1:12],
  las=2, cex.axis=.5, cex.lab=.6,
  tck= -0.0065
)
axis(
  2, at = seq(-.16, .04, .01),
  las=1, cex.axis=.4, cex.lab=.45,
  tck= -0.0065
)
axis(2, at=c(-1,1))
legend("bottomleft",legend=c("Summer rainfall (95% CI)", "February frost depth (95% CI)"),
       col=c("black","grey50"), seg.len = 2, lwd=1, bty="n", cex=.5)
dev.off()


#
# ~~~ FIG 2 A-D ~~~  #####
#    How large is the aggregate impact of the delays?

# NX1U1: Residential buildings completed in December
# NX1U1_11: Residential buildings completed in November
# NX1U1_10: Residential buildings completed in October

d$NX1U1_autumn <- d$NX1U1 + d$NX1U1_11 + d$NX1U1_10
d.nextyear <- d[,c("AGS","Jahr", sprintf("NX1U1_%02d", 1:11), "NX1U1")]
names(d.nextyear)[ncol(d.nextyear)] <- "NX1U1_12"
d.nextyear$Jahr <- d.nextyear$Jahr - 1
d.nextyear <- merge(
  d.nextyear, d[,c("AGS","Jahr","NX1U1_autumn", "rainfall_spells_summer")],
  by=c("AGS","Jahr")
)
require(plm)
require(lfe)
d.nextyear <- pdata.frame(d.nextyear, index=c("AGS", "Jahr"))
summary(felm(
  NX1U1_autumn ~ rainfall_spells_summer | AGS + Jahr | 0 | AGS, data=d.nextyear
))
#
coefs <- coefs.k <-  data.frame(coef1 = numeric(12), se1 = numeric(12))
for(m in 1:12) {
  d.nextyear$depvar <- d.nextyear[,sprintf("NX1U1_%02d",1)]
  if(m > 1) for(k in 2:m) d.nextyear$depvar <- d.nextyear$depvar + d.nextyear[,sprintf("NX1U1_%02d",k)]
  mod <- (depvar ~ 1 | AGS + Jahr | (NX1U1_autumn ~ rainfall_spells_summer) | AGS)
  fit <- felm(mod, data=d.nextyear) #
  sf <- summary(fit, robust=T)
  coefs[m,1:2] <- sf$coefficients["`NX1U1_autumn(fit)`",1:2]
}
#
coef <- -coefs[,1]
se <- coefs[,2]
xrange <- 1:12
yrange <- c(-.72, 1.05)
alpha <- .1
png("figures/FIG_2D.png", height=4.5, width=6.5, units = "cm", res=300)
par(mar=c(1.8,1.8,0.1,0.1), mgp=c(.8,0.2,0))
plot(x=xrange, y=coefs[,1], type="b", col="white",
     ylim=yrange,
     axes=F, ylab="", xlab="", cex=.4, cex.lab=.5
)
abline(h=0)
abline(h=1, col="grey")
for(k in 1:length(xrange)) {
  segments(
    x0=xrange[k],
    y0= coef[k] + qnorm(alpha/2) * se[k],
    y1= coef[k] - qnorm(alpha/2) * se[k]
  )
  segments(
    x0=xrange[k] - .05,
    x1=xrange[k] + .05,
    y0= coef[k]
  )
}
axis(1, at = c(-2, 14))
axis(
  1, at=xrange, labels = months,
  las=2, cex.axis=.5, cex.lab=.6,
  tck= -0.0065
)
at.y <- seq(-.8, 1.2, .2)
axis(
  2, at = at.y ,
  labels = sprintf("%.1f", at.y),
  las=1, cex.axis=.4, cex.lab=.45,
  tck= -0.0065
)
mtext(side=2, text="Coefficient (cumulated impact)", cex=.5, line=1)
dev.off()
#
postscript("figures/FIG_2D.eps", height=4.5, width=5, onefile=T, horizontal=F)
par(mar=c(3,3.5,0.1,0.1), mgp=c(.8,0.3,0))
plot(x=xrange, y=coefs[,1], type="b", col="white",
     ylim=yrange,
     axes=F, ylab="", xlab="", cex=1, cex.lab=1
)
abline(h=0)
abline(h=1, col="grey")
for(k in 1:length(xrange)) {
  segments(
    x0=xrange[k],
    y0= coef[k] + qnorm(alpha/2) * se[k],
    y1= coef[k] - qnorm(alpha/2) * se[k]
  )
  segments(
    x0=xrange[k] - .05,
    x1=xrange[k] + .05,
    y0= coef[k]
  )
}
axis(1, at = c(-2, 14))
axis(
  1, at=xrange, labels = months,
  las=2, cex.axis=1, cex.lab=1,
  tck= -0.0065
)
at.y <- seq(-.8, 1.2, .2)
axis(
  2, at = at.y ,
  labels = sprintf("%.1f", at.y),
  las=1, cex.axis=1, cex.lab=1,
  tck= -0.0065
)
mtext(side=2, text="Coefficient (cumulated impact)", cex=1, line=2.5)
dev.off()
#

# Capacity constraints figures
# Fig 2A
require(readxl)
pd <- as.data.frame(read_xlsx(
  "data/orig/Zeitreihe_Fachkraeftemangel_Handwerk-Dezember.xlsx", sheet="plot_data",
  range="A1:H18"
))
png("figures/FIG_2A.png", height=4.5, width=6.5, units = "cm", res=300)
par(mar=c(1.8,1.8,0.1,0.1), mgp=c(.8,0.2,0))
plot(NA, ylim=c(6.1,13.5), xlim=c(2005,2019), axes=F, ylab="", xlab="")
lines(
  pd[pd$Jahr %in% 2005:2019,c("Jahr","delay_in_weeks")]
)
axis(1, at = c(2000, 2100))
axis(1, at = 2004:2019, labels=NULL, las=2, cex.axis=.55, hadj=1.4)
axis(
  2, at = 5:14 ,
  las=1, cex.axis=.55,
  tck= -0.0065
)
mtext(side=2, text="Waiting time in weeks", cex=.5, line=1)
dev.off()


postscript("figures/FIG_2A.eps", height=4.5, width=5, onefile=T, horizontal=F)
par(mar=c(3,3.5,0.1,0.1), mgp=c(.8,0.3,0))
plot(NA, ylim=c(6.1,13.5), xlim=c(2005,2019), axes=F, ylab="", xlab="")
lines(
  pd[pd$Jahr %in% 2005:2019,c("Jahr","delay_in_weeks")]
)
axis(1, at = c(2000, 2100))
axis(1, at = 2004:2019, labels=NULL, las=2, cex.axis=1, hadj=1.4)
axis(
  2, at = 5:14 ,
  las=1, cex.axis=1,
  tck= -0.0065
)
mtext(side=2, text="Waiting time in weeks", cex=1, line=2)
dev.off()

# Fig 2B
pd$index1 <- pd$unemp_pp_constr / pd$unemp_pp_constr[pd$Jahr == 2008]
pd$index2 <- pd$unemp_pp_inst / pd$unemp_pp_inst[pd$Jahr == 2008]
pd$index3 <- pd$unemp_rate / pd$unemp_rate[pd$Jahr == 2008]
png("figures/FIG_2B.png", height=4.5, width=6.5, units = "cm", res=300)
par(mar=c(1.8,1.8,0.1,0.1), mgp=c(.8,0.2,0))
plot(NA, axes=F, ylab="", xlab="",
     ylim=range(pd[pd$Jahr %in% 2008:2019,sprintf("index%d",1:3)]),
     xlim=c(2008,2019))
abline(h=0)
lines(
  pd[pd$Jahr %in% 2008:2019,c("Jahr","index1")], col="black"
)
lines(
  pd[pd$Jahr %in% 2008:2019,c("Jahr","index2")], lty=2, col="grey25"
)
points(pd[pd$Jahr %in% 2008:2019,c("Jahr","index2")], col="grey25", cex=.5)
lines(pd[pd$Jahr %in% 2008:2019,c("Jahr","index3")], lty=3, col="grey25", lwd=.5)
points(pd[pd$Jahr %in% 2008:2019,c("Jahr","index3")], col="grey25", cex=.5, pch=2)
axis(1, at = c(2000, 2100))
axis(1, at = 2004:2019, labels=NULL, las=2, cex.axis=.55, hadj=1.4)
axis(
  2, at = seq(0, 1.4, .2),
  las=1, cex.axis=.55,
  tck= -0.0065
)
mtext(side=2, text="Index (2008=1)", cex=.5, line=1)
text(x=2011, y=0.43, cex=.55, "Installations")
text(x=2015.9, y=0.60, cex=.55, "Construction")
text(x=2016.1, y=0.98, cex=.55, "Economy-wide\n unemployment rate")
dev.off()
#
postscript("figures/FIG_2B.eps", height=4.5, width=5, onefile=T, horizontal=F)
par(mar=c(3,3.5,0.1,0.1), mgp=c(.8,0.3,0))
plot(NA, axes=F, ylab="", xlab="",
     ylim=range(pd[pd$Jahr %in% 2008:2019,sprintf("index%d",1:3)]),
     xlim=c(2008,2019))
abline(h=0)
lines(
  pd[pd$Jahr %in% 2008:2019,c("Jahr","index1")], col="black"
)
lines(
  pd[pd$Jahr %in% 2008:2019,c("Jahr","index2")], lty=2, col="grey25"
)
points(pd[pd$Jahr %in% 2008:2019,c("Jahr","index2")], col="grey25", cex=1)
lines(pd[pd$Jahr %in% 2008:2019,c("Jahr","index3")], lty=3, col="grey25", lwd=0.5)
points(pd[pd$Jahr %in% 2008:2019,c("Jahr","index3")], col="grey25", cex=1, pch=2)
axis(1, at = c(2000, 2100))
axis(1, at = 2004:2019, labels=NULL, las=2, cex.axis=1, hadj=1.4)
axis(
  2, at = seq(0, 1.4, .2),
  las=1, cex.axis=1,
  tck= -0.0065
)
mtext(side=2, text="Index (2008=1)", cex=1, line=2)
text(x=2011, y=0.43, cex=1, "Installations")
text(x=2015.9, y=0.60, cex=1, "Construction")
text(x=2016.1, y=0.98, cex=1, "Economy-wide\n unemployment rate")
dev.off()
#
#
# Fig 2C
png("figures/FIG_2C.png", height=4.5, width=6.5, units = "cm", res=300)
par(mar=c(1.8,1.8,0.1,0.1), mgp=c(.8,0.2,0))
plot(NA, axes=F, ylab="", xlab="",
     ylim=range(pd[pd$Jahr %in% 2008:2019,"unemp_pp_inst"]),
     xlim=c(2008,2019))
abline(h=0)
lines(
  pd[pd$Jahr %in% 2008:2019,c("Jahr","unemp_pp_inst")]
)
axis(1, at = c(2000, 2100))
axis(1, at = 2004:2019, labels=NULL, las=2, cex.axis=.55, hadj=1.4)
axis(
  2, at = seq(0, 1.8, .2),
  las=1, cex.axis=.55,
  tck= -0.0065
)
text(x=2011, y=0.42, cex=.55, "Installations")
mtext(side=2, text="Skilled jobseekers per open position", cex=.5, line=1)
dev.off()
#
#
postscript("figures/FIG_2C.eps", height=4.5, width=5, onefile=T, horizontal=F)
par(mar=c(3,3.5,0.1,0.1), mgp=c(.8,0.3,0))
plot(NA, axes=F, ylab="", xlab="",
ylim=range(pd[pd$Jahr %in% 2008:2019,"unemp_pp_inst"]),
xlim=c(2008,2019))
abline(h=0)
lines(
  pd[pd$Jahr %in% 2008:2019,c("Jahr","unemp_pp_inst")]
)
axis(1, at = c(2000, 2100))
axis(1, at = 2004:2019, labels=NULL, las=2, cex.axis=1, hadj=1.4)
axis(
  2, at = seq(0, 1.8, .2),
  las=1, cex.axis=1,
  tck= -0.0065
)
text(x=2011, y=0.42, cex=1, "Installations")
mtext(side=2, text="Skilled jobseekers per open position", cex=1, line=2)
dev.off()
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# . 1.2 Aggregate to higher levels ####
#      - Create district-, AMR-, PR-level panels
sum.vars <- c("NX2U1",sprintf("NX2U1_%02d",1:11),"NX2U1_autumn","NX2U1_tot_avg","Einh")
mean.vars <- weather.instruments <- c(
  "rainfall_spells_summer","frost_depth_feb",
  "rainfall_t5_spells", "rainfall_longest_spell"
)  #


# Municipality shapefile for aggregation to PR + CZ
require(sf)
gmd <- read_sf(dsn = "data/orig/shapes/MUN/VG250_GEM.shp")
gmd$area <- gmd %>% st_area() %>% as.numeric()
gmd$AGS <- as.integer(gmd$AGS)
gmd <- gmd %>% st_make_valid()

# Aggregation to PRs
amr <- read_sf("data/orig/shapes/PR/PR250.shp")
amr <- amr %>% st_transform(crs = st_crs(gmd))
am <- NULL
for(i in 1:nrow(amr)) {
  if(i %% 10 == 0) cat(".")
  if(i == nrow(amr)) cat("\n")
  area <- amr[i,] %>% st_area() %>% as.numeric()
  intersects <- gmd %>% st_intersects(amr[i,]) %>% lengths() > 0
  tmp <- gmd[intersects,] %>% st_intersection(amr[i,])
  tmp$new_area <- tmp %>% st_area() %>% as.numeric()
  tmp$gem_area_share <- tmp$new_area / tmp$area
  tmp$area_share <- tmp$new_area / area
  tmp <- aggregate(
    tmp[,c("gem_area_share","area_share"),drop=T], by=list(AGS = tmp[,"AGS",drop=T]),
    FUN=function(x) min(sum(x),1)
  )
  tmp$area_share <- tmp$area_share / sum(tmp$area_share)

  tmp <- merge(d[d$AGS %in% tmp$AGS,], tmp[,c("AGS","area_share","gem_area_share")], by="AGS")

  sel.sum <- tmp$gem_area_share > .95
  if(any(sel.sum)) {
    districts <- paste(
      unique(floor(as.integer(tmp$AGS[sel.sum])*1e-3)),collapse=","
    )
    tmp.sum <- aggregate(
      tmp[sel.sum,sum.vars],
      by=list(Jahr = tmp$Jahr[sel.sum]),
      FUN = sum, na.rm=T
    )
    for(v in mean.vars) tmp[,v] <- tmp[,v] * tmp$area_share
    tmp.mean <- aggregate(
      tmp[,mean.vars],
      by=list(Jahr = tmp$Jahr),
      FUN = sum, na.rm=T
    )
    tmp <- merge(tmp.sum, tmp.mean, by=c("Jahr"))
    tmp$ROR <- amr[i,"ROR",drop=T]
    tmp$SN_ROR <- as.integer(amr[i,"SN_ROR",drop=T])
    tmp$districts <- districts

    am <- rbind(
      am, tmp
    )
  }

}


# Add PR-level control variables
am$Jahr <- am$Jahr + 1 # supply and weather variables are for t-1
am <- merge(
  am, readRDS("data/orig/control_variables_panel_pr.RDS"),
  by=c("SN_ROR", "Jahr")
)
# Save data for later use
saveRDS(am, "data/tmp/buildings_weather_shocks_panel_pr.RDS")


# Aggregation to AMR
amr <- read_sf("data/orig/shapes/CZ/AMR250.shp")
amr <- amr %>% st_transform(crs = st_crs(gmd))
am <- NULL
for(i in 1:nrow(amr)) {
  if(i %% 10 == 0) cat(".")
  if(i %% 100 == 0) cat(" ")
  if(i == nrow(amr)) cat("\n")

  area <- amr[i,] %>% st_area() %>% as.numeric()
  intersects <- gmd %>% st_intersects(amr[i,]) %>% lengths() > 0
  tmp <- gmd[intersects,] %>% st_intersection(amr[i,])
  tmp$new_area <- tmp %>% st_area() %>% as.numeric()
  tmp$gem_area_share <- tmp$new_area / tmp$area
  tmp$area_share <- tmp$area / area
  tmp <- aggregate(
    tmp[,c("gem_area_share","area_share"),drop=T], by=list(AGS = tmp[,"AGS",drop=T]),
    FUN=function(x) min(sum(x),1)
  )
  tmp$area_share <- tmp$area_share / sum(tmp$area_share)

  tmp <- merge(d[d$AGS %in% tmp$AGS,], tmp[,c("AGS","area_share","gem_area_share")], by="AGS")

  sel.sum <- tmp$gem_area_share > .95
  if(any(sel.sum)) {
    districts <- paste(
      unique(floor(as.integer(tmp$AGS[sel.sum])*1e-3)),collapse=","
    )
    tmp.sum <- aggregate(
      tmp[sel.sum,sum.vars],
      by=list(Jahr = tmp$Jahr[sel.sum]),
      FUN = sum, na.rm=T
    )
    for(v in mean.vars) tmp[,v] <- tmp[,v] * tmp$area_share
    tmp.mean <- aggregate(
      tmp[,mean.vars],
      by=list(Jahr = tmp$Jahr),
      FUN = sum, na.rm=T
    )
    tmp <- merge(tmp.sum, tmp.mean, by=c("Jahr"))
    tmp$AMR <- amr[i,"AMR",drop=T]
    tmp$SN_AMR <- as.integer(amr[i,"SN_AMR",drop=T])
    tmp$districts <- districts

    am <- rbind(
      am, tmp
    )
  }
}
am$Jahr <- am$Jahr + 1 # supply and weather variables are for t-1
saveRDS(am, "data/tmp/buildings_weather_shocks_panel_cz.RDS")


# Aggregation to District
d$Kreis <- floor(as.integer(d$AGS)*1e-3)
kreis <- aggregate(
  d[,sum.vars], by=list(Kreis = d$Kreis, Jahr = d$Jahr), sum
)
tmp <- aggregate(
  d[,mean.vars], by=list(Kreis = d$Kreis, Jahr = d$Jahr), mean
)
kreis <- merge(kreis, tmp, by=c("Kreis","Jahr"))
kreis$Jahr <- kreis$Jahr + 1 # supply and weather variables are for t-1
saveRDS(kreis, "data/tmp/buildings_weather_shocks_panel_districts.RDS")
#
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# . 2 Rents data ####
#
# . 2.1 Unit-level data set ######
y <- readRDS("data/orig/rent_data.RDS")
y$Baujahr_NA <- as.integer(is.na(y$Baujahr))
y$Baujahr[is.na(y$Baujahr)] <- 0
y$rent_psqm <- y$Kaltmiete / y$Flaeche
#
# Add PR-level variables
ror <- readRDS("data/tmp/buildings_weather_shocks_panel_pr.RDS")
#
y <- merge(
  y,
  ror[,c(
    "SN_ROR", "Jahr", # identifiers
    "NX2U1_tot_avg", "NX2U1_autumn", # endogenous supply variables
    "rainfall_spells_summer", # main instrument
    "employment_lag", "unemprate_lag", "students_per1000_lag", # PR-level controls
    "Einh" ## housing stock, 2011
  )],
  by=c("SN_ROR", "Jahr"), all.x=T
)
#
# Save for later use
saveRDS(y, "data/tmp/rent_data_estimation.RDS")
#
#
# . 2.2 Rent indices (panel) ####
#
# Create indices for avg. rents for
#  - districts,
#  - commuting zones
#  - planning regions
#
# Reload data
y <- readRDS("data/tmp/rent_data_estimation.RDS")
ror_panel <- readRDS("data/tmp/buildings_weather_shocks_panel_pr.RDS")
am_panel <- readRDS("data/tmp/buildings_weather_shocks_panel_cz.RDS")
district_panel <- readRDS("data/tmp/buildings_weather_shocks_panel_districts.RDS")


#
# Number of units offered on the market by PR (-> Table 5)
y$count <- 1
count <- aggregate(
  data.frame(flow = y$count), by=list(SN_ROR = y$SN_ROR, Jahr = y$Jahr), FUN=sum #
)
count_buildage <- aggregate(
  data.frame(flow = y$count[y$Baujahr_NA == 0]), by=list(
    SN_ROR = y$SN_ROR[y$Baujahr_NA == 0],
    Jahr = y$Jahr[y$Baujahr_NA == 0],
    new = as.integer(y$Alter[y$Baujahr_NA == 0] < 2)
  ), FUN=sum #
)
count_buildage <- count_buildage[count_buildage$new == 1,]
count <- merge(
  count, count_buildage[,c("SN_ROR", "Jahr", "flow")],
  by=c("SN_ROR", "Jahr"), suffixes=c("_all", "_new")
)
count$flow_existing <- count$flow_all - count$flow_new
rm(count_buildage)
ror_panel <- merge(ror_panel, count, by=c("SN_ROR", "Jahr"), all.x=T)
rm(count)

# Hedonic rent index at district level
y$Kreis <- as.integer(floor(y$AGS * 1e-3))
y$Jahr <- as.factor(y$Jahr)

require(lfe)
mod <- log(rent_psqm) ~ log(Flaeche) + poly(Baujahr,2) +
  Baujahr_NA + Fussbodenheizung + Parkett + Aufzug +
  EBK + GaesteWC + BalkonTerrasse + Gartenmit +
  Jahr | Typ + Ausstattung

kp <- NULL
for(kreis in unique(y$Kreis)) {
  if(kreis != 9361) {
    fit <- felm(mod, data=y[y$Kreis == kreis,])
    index <- coef(fit)
    index <- c(0, as.numeric(index[grepl("Jahr", names(index))][sprintf("Jahr%04d",2012:2018)]))
    kp <- rbind(
      data.frame(Kreis = kreis, Jahr = 2011:2018, Index = index),
      kp
    )
  }
}
# Mergew with other variables
kp <- merge(kp, district_panel, by=c("Kreis","Jahr"))
saveRDS(kp, file="data/tmp/panel_districts.RDS")


# Hedonic index at PR level
y <- y[! is.na(y$SN_ROR) & ! is.na(y$SN_AMR),]
ror <- NULL; i <- 0
for(sn_ror in unique(y$SN_ROR)) {
  if(sum(y$SN_ROR == sn_ror) > 100) {
    fit <- felm(mod, data=y[y$SN_ROR == sn_ror,])
    index <- coef(fit)
    index <- c(0, as.numeric(index[grepl("Jahr", names(index))][sprintf("Jahr%04d",2012:2018)]))
    ror <- rbind(
      data.frame(SN_ROR = sn_ror, Jahr = 2011:2018, Index = index),
      ror
    )
  }
  i <- i + 1
  cat("."); if(i %% 25 == 0) cat("\n")
}
cat("\n")
# merge with other variables
ror <- merge(ror, ror_panel, by=c("SN_ROR","Jahr"))

# Create population density variable at PR level
#  Use shapefile to compute area
require(sf)
ror_shp <- read_sf("data/orig/shapes/PR/PR250.shp")
ror_shp$SN_ROR <- as.integer(ror_shp$SN_ROR)
ror_shp$area <- ror_shp %>% st_area() %>% as.numeric() * 1e-6
ror_area <- aggregate(
  data.frame(ror_area_sqkm = ror_shp$area), by=list(SN_ROR=ror_shp$SN_ROR), FUN=sum
)
ror_area <- merge(ror_area, ror[ror$Jahr == 2011,c("SN_ROR", "population")], by="SN_ROR")
ror_area$pop_density <- ror_area$population / ror_area$ror_area_sqkm
ror_area$high_popdens <- ror_area$SN_ROR %in% ror_area$SN_ROR[
  ror_area$pop_density >= median(ror_area$pop_density)
]
#
ror <- merge(ror, ror_area[,c("SN_ROR", "pop_density", "high_popdens")], by="SN_ROR")
saveRDS(ror, file="data/tmp/panel_pr.RDS")


# Hedonic index at CZ level
amr <- NULL; i <- 0
for(sn_amr in unique(y$SN_AMR)) {
  if(sum(y$SN_AMR == sn_amr) > 100) {
    fit <- felm(mod, data=y[y$SN_AMR == sn_amr,])
    index <- coef(fit)
    index <- c(0, as.numeric(index[grepl("Jahr", names(index))][sprintf("Jahr%04d",2012:2018)]))
    amr <- rbind(
      data.frame(SN_AMR = sn_amr, Jahr = 2011:2018, Index = index),
      amr
    )
  }
  i <- i + 1
  cat("."); if(i %% 25 == 0) cat("\n")
}
cat("\n")
# Merge with other variables
amr <- merge(
  amr,
  readRDS("data/tmp/buildings_weather_shocks_panel_cz.RDS"),
  by=c("SN_AMR","Jahr")
)
saveRDS(amr, file="data/tmp/panel_cz.RDS")
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



# . 2.3 Quality of new and existing housing  ######
#
# Impute prices of rental units
#  -> Estimate hedonic model based on price data
require(lfe)
mfh <- readRDS("data/orig/price_data_MFH.RDS") # raw data
#
mfh$Baujahr_NA <- as.integer(is.na(mfh$Baujahr))
mfh$Baujahr[mfh$Baujahr_NA==1] <- 0
mfh$Baujahr_sq <- mfh$Baujahr^2
mfh$new <- as.integer(mfh$Baujahr >= mfh$Jahr)
mfh$log_flaeche <- log(mfh$Flaeche)
mfh$log_price <- log(mfh$Kaufpreis)
#
mod <- log_price ~ log_flaeche + new + Baujahr + Baujahr_sq +
  Baujahr_NA + Fussbodenheizung + Parkett + Aufzug +
  EBK + GaesteWC + BalTer + Gartenmit | Jahr + Typ + Ausstattung + PLZ
#
fit <- felm(
  mod, data=mfh
)
summary(fit, robust=T)
model_vars <- all.vars(mod)
FEs <- getfe(fit)

# Predict prices of rental units using hedonic model
y <- readRDS("data/tmp/rent_data_estimation.RDS")
y <- y[y$Zimmer %in% seq(1, 7, .5),]
y$Baujahr_sq <- y$Baujahr^2
y$new <- as.integer(y$Baujahr >= y$Jahr)
y$log_flaeche <- log(y$Flaeche)
y$BalTer <- y$BalkonTerrasse
y <- y[
  y$PLZ %in% FEs[FEs$fe == "PLZ","idx"],
  c("SN_ROR",model_vars[model_vars %in% names(y)])
]
head(y[,names(coef(fit))])

y$log_price_hat <- as.matrix(y[,names(coef(fit))]) %*% as.vector(coef(fit)) +
  FEs$effect[FEs$fe == "Jahr"][match(y$Jahr,FEs$idx[FEs$fe == "Jahr"])] +
  FEs$effect[FEs$fe == "Typ"][match(y$Typ,FEs$idx[FEs$fe == "Typ"])] +
  FEs$effect[FEs$fe == "Ausstattung"][match(y$Ausstattung,FEs$idx[FEs$fe == "Ausstattung"])] +
  FEs$effect[FEs$fe == "PLZ"][match(y$PLZ,FEs$idx[FEs$fe == "PLZ"])]



# Compare price distributions
nearest <- function(d, xs) {
  eps <- max(diff(d$x))
  get_y <- (d$y[unlist(lapply(xs, function(x) which(abs(d$x - x) == sort(abs(d$x - x))[1])))] +
              d$y[unlist(lapply(xs, function(x) which(abs(d$x - x) == sort(abs(d$x - x))[2])))])/2
  drop_y <- unlist(lapply(xs, function(x) abs(d$x - x)[
    which(abs(d$x - x) == min(abs(d$x - x)))
  ]))
  get_y[drop_y > eps] <- NA
  get_y
}
#
sfh <- readRDS("data/orig/price_data_SFH.RDS")
sfh$Baujahr_NA <- as.integer(is.na(sfh$Baujahr))
sfh$Baujahr[sfh$Baujahr_NA==1] <- 0
sfh$Baujahr_sq <- sfh$Baujahr^2
sfh$new <- as.integer(sfh$Baujahr >= sfh$Jahr)
sfh$log_price <- log(sfh$Kaufpreis)
sfh$log_flaeche <- log(sfh$Flaeche)


# Residualize by PR and year
y <- y[! is.na(y$SN_ROR),]
cw_ror <- table(y[,c("PLZ","SN_ROR")])
col_id <- apply(cw_ror, 1, function(x) which(x == max(x))[1])
cw_ror <- data.frame(
  PLZ = as.integer(rownames(cw_ror)),
  SN_ROR = as.integer(colnames(cw_ror)[col_id])
)
mfh <- merge(mfh, cw_ror, by="PLZ")
sfh <- merge(sfh, cw_ror, by="PLZ")

# Append data
sfh$market <- "s"
mfh$market <- "m"
y$market <- "r"
y$log_price <- y$log_price_hat # copy to have common names in each data set
data <- rbind(
  sfh[,c("log_price", "market", "new", "SN_ROR", "Jahr")],
  mfh[,c("log_price", "market", "new", "SN_ROR", "Jahr")],
  y[,c("log_price", "market", "new", "SN_ROR", "Jahr")]
)

fit <- felm(
  log_price ~ 0 | SN_ROR + Jahr, data=data
)
stopifnot(nrow(data) == length(fit$residuals))
data$log_price_res <- fit$residuals # residualized (imputed) prices

# Compute densities
adj_fac <- 1
d_new <- density(
  data$log_price_res[data$new == 1 & data$market %in% c("s","m")],
  adj=adj_fac, n=2^12
)
d_new_mfh <- density(
  data$log_price_res[data$new == 1 & data$market %in% c("m")],
  adj=adj_fac, n=2^12
)
d_new_sfh <- density(
  data$log_price_res[data$new == 1 & data$market %in% c("s")],
  adj=adj_fac, n=2^12
)
d_exist_rent <- density(
  data$log_price_res[data$new == 0 & data$market %in% c("r")],
  adj=adj_fac, n=2^12
)

xs <- quantile(data$log_price_res[data$new == 0 & data$market %in% c("r")], seq(.005, .995,.005))
xs_new <- quantile(data$log_price_res[data$new == 1 & data$market %in% c("s", "m")], seq(.005, .995,.005))
max(xs)
xs_new[which(xs_new >= quantile(data$log_price_res[data$new == 0 & data$market %in% c("r")], .995))]

# ~~~ FIG 1 ~~~ ####
# Figure A: Price distributions
ylim <- c(0, max(d_new_mfh$y, d_new_sfh$y, d_exist_rent$y, d_new$y)*1.05)
xlim <- range(d_new_mfh$x, d_new_sfh$x, d_exist_rent$x, d_new$x)
png("figures/FIG_1A.png", height=6, width=7, units = "cm", res=600)
lwds <- 1.25
par(mar=c(1.8,1.4,0.1,0.1), mgp=c(.8,0.1,0))
plot(
  NA,
  ylim=ylim,
  xlim=xlim,
  lwd=lwds, axes=F, main="", ylab="", xlab=""#, outer=T
)
axis(
  2, at=c(-5,seq(0,1.5,.1)),
  las=2, labels=sprintf("%.1f",c(-5,seq(0,1.5,.1))),
  cex.axis=.45, tck= -0.007
)
axis(
  1, at=seq(-4,5,.5),
  cex.axis=.45, tck= -0.007
)
mtext("Density", 2, line=.8, cex=.55)
mtext("Residualized log price", 1, line=.8, cex=.55)
lines(d_new_mfh, col="grey50", lwd=lwds*2/3, lty=3)
lines(d_new_sfh, col="grey50", lwd=lwds*2/3, lty=4)
lines(d_new, col="grey50", lwd=lwds)
lines(d_exist_rent, col="black", lwd=lwds)
text(x=-1.05, y=.87, labels="Existing\nrental housing\n(imputed prices)", cex=.45, adj=c(.5, 0))
text(x=3.1, y=.06, labels="New units in multi-\nfamily housing", cex=.45, adj=c(.5, 0))
segments(x0=2.2, x1=1.9, y0=.09, y1=.05, lwd=.5)
text(x=1.9, y=1.008, labels="New units in single-\nfamily housing", cex=.45, adj=c(.5, 0))
segments(x0=.88, x1=.57, y0=1.07, y1=1.055, lwd=.5)
text(x=1.84, y=.9, labels="All new units", cex=.45)
segments(x0=1.15, x1=.63, y0=.9, y1=.93, lwd=.5)
abline(h=0, col="grey", lwd=0.5)
dev.off()

postscript("figures/FIG_1A.eps", height=4, width=5, onefile=T, horizontal=F)
par(mar=c(3,3,0.1,0.1), mgp=c(.8,0.3,0))
plot(
  NA,
  ylim=ylim,
  xlim=xlim,
  lwd=lwds, axes=F, main="", ylab="", xlab=""#, outer=T
)
axis(
  2, at=c(-5,seq(0,1.5,.1)),
  las=2, labels=sprintf("%.1f",c(-5,seq(0,1.5,.1))),
  cex.axis=1, tck= -0.007
)
axis(
  1, at=seq(-4,5,.5),
  cex.axis=1, tck= -0.007
)
mtext("Density", 2, line=1.8, cex=1)
mtext("Residualized log price", 1, line=1.8, cex=1)
lines(d_new_mfh, col="grey50", lwd=1, lty=4)
lines(d_new_sfh, col="grey50", lwd=1, lty=5)
lines(d_new, col="grey50", lwd=1)
lines(d_exist_rent, col="black", lwd=1)
text(x=-1.15, y=.87, labels="Existing\nrental housing\n(imputed prices)", cex=1, adj=c(.5, 0))
text(x=3.1, y=.06, labels="New units in multi-\nfamily housing", cex=1, adj=c(.5, 0))
segments(x0=2.1, x1=1.9, y0=.08, y1=.05, lwd=1)
text(x=1.95, y=1.035, labels="New units in single-\nfamily housing", cex=1, adj=c(.5, 0))
segments(x0=.88, x1=.57, y0=1.07, y1=1.055, lwd=1)
text(x=1.94, y=.9, labels="All new units", cex=1)
segments(x0=1.10, x1=.63, y0=.9, y1=.93, lwd=1)
abline(h=0, col="grey", lwd=0.5)
dev.off()


# Figure B: New supply sold by quality bin, relative to size of rental market
png("figures/FIG_1B.png", height=6, width=7, units = "cm", res=600)
share_new_supply <- (nearest(d_new, xs) * d_new$n) / (
  nearest(d_exist_rent, xs) * d_exist_rent$n + nearest(d_new, xs) * d_new$n
)
lwds <- 1.25
par(mar=c(1.8,1.4,0.1,0.1), mgp=c(.8,0.1,0))
plot(
  x = seq(.005, .995,.005),
  y = share_new_supply,
  col="black", cex=.15, pch=15,
  axes=F, xlab="", ylab="", ylim=c(0, .6)
)
axis(
  2, at=c(-5,seq(0,1.5,.1)),
  las=2, labels=sprintf("%.1f",c(-5,seq(0,1.5,.1))),
  cex.axis=.45, tck= -0.007
)
axis(
  1, at=seq(-4,5,.1),
  cex.axis=.45, tck= -0.007
)
mtext("Share new supply", 2, line=.8, cex=.55)
mtext("Quantile, imputed price of existing rental housing", 1, line=.8, cex=.55)
lines(
  x = seq(.005, .995,.005),
  y = share_new_supply,
  col="black"
)
segments(x0=0, x1=1, y0=0, lwd=.5, col="grey")
segments(x0=0, x1=.39, y0=.01, col="grey", lty=c(5))
segments(x0=.39, y0=.01, y1=-1, col="grey", lty=c(5))
segments(x0=0, x1=.77, y0=.1, col="grey", lty=c(5))
segments(x0=.77, y0=.1, y1=-1, col="grey", lty=c(5))
dev.off()


postscript("figures/FIG_1B.eps", height=4, width=5, onefile=T, horizontal=F)
par(mar=c(3,3,0.1,0.1), mgp=c(.8,0.3,0))
plot(
  x = seq(.005, .995,.005),
  y = share_new_supply,
  col="black", cex=.15, pch=15,
  axes=F, xlab="", ylab="", ylim=c(0, .6)
)
axis(
  2, at=c(-5,seq(0,1.5,.1)),
  las=2, labels=sprintf("%.1f",c(-5,seq(0,1.5,.1))),
  cex.axis=1, tck= -0.007
)
axis(
  1, at=seq(-4,5,.1),
  cex.axis=1, tck= -0.007
)
mtext("Share new supply", 2, line=1.8, cex=1)
mtext("Quantile, imputed price of existing rental housing", 1, line=1.8, cex=1)
lines(
  x = seq(.005, .995,.005),
  y = share_new_supply,
  col="black"
)
segments(x0=0, x1=1, y0=0, lwd=1, col="grey")
segments(x0=0, x1=.39, y0=.01, col="grey", lty=c(5))
segments(x0=.39, y0=.01, y1=-1, col="grey", lty=c(5))
segments(x0=0, x1=.77, y0=.1, col="grey", lty=c(5))
segments(x0=.77, y0=.1, y1=-1, col="grey", lty=c(5))
dev.off()


# ~~~ FIG O-A1 ~~~ ####
ror <- readRDS("data/tmp/panel_pr.RDS")
sel_high_hor <- ror$hor_2011 > median(ror$hor_2011[ror$Jahr == 2011])
high_hor_RORs <- data$SN_ROR %in% unique(ror$SN_ROR[sel_high_hor])
adj_fac <- 1
d_new_H <- density(
  data$log_price_res[data$new == 1 & data$market %in% c("s") & high_hor_RORs],
  adj=adj_fac, n=2^12
)
d_exist_rent_H <- density(
  data$log_price_res[data$new == 0 & data$market %in% c("r") & high_hor_RORs],
  adj=adj_fac, n=2^12
)
d_new_L <- density(
  data$log_price_res[data$new == 1 & data$market %in% c("s") & !high_hor_RORs],
  adj=adj_fac, n=2^12
)
d_exist_rent_L <- density(
  data$log_price_res[data$new == 0 & data$market %in% c("r") & !high_hor_RORs],
  adj=adj_fac, n=2^12
)
xs_H <- quantile(data$log_price_res[data$new == 0 & data$market %in% c("r") & high_hor_RORs], seq(.005, .995,.005))
xs_L <- quantile(data$log_price_res[data$new == 0 & data$market %in% c("r") & !high_hor_RORs], seq(.005, .995,.005))
#
ylim <- c(0, max(d_new_L$y, d_new_H$y, d_exist_rent_L$y, d_exist_rent_H$y)+.4)
xlim <- range(d_new_L$x, d_new_H$x, d_exist_rent_L$x, d_exist_rent_H$x)
png("figures/FIG_O-A1.png", height=6, width=7, units = "cm", res=600)
lwds <- 1.25
par(mar=c(1.8,1.4,0.1,0.1), mgp=c(.8,0.1,0))
plot(
  NA,
  ylim=ylim,
  xlim=xlim,
  lwd=lwds, axes=F, main="", ylab="", outer=T
)
axis(
  2, at=c(-5,seq(0,2,.2)),
  las=2, labels=sprintf("%.1f",c(-5,seq(0,2,.2))),
  cex.axis=.45, tck= -0.007
)

axis(
  1, at=seq(-4,5,.5),
  cex.axis=.45, tck= -0.007
)
mtext("Density", 2, line=.8, cex=.55)
mtext("Residualized log price", 1, line=.8, cex=.55)

lines(d_new_L, col="red", lwd=lwds, lty=2)
lines(d_exist_rent_L, col="red", lwd=lwds)
lines(d_new_H, col="grey20", lwd=lwds, lty=2)
lines(d_exist_rent_H, col="grey20", lwd=lwds)
legend(
  "topleft",
  legend=c(
    "Existing rental housing in PRs w/ below-median HOR",
    "New single-family housing in PRs w/ below-median HOR",
    "Existing rental housing in PRs w/ above-median HOR",
    "New single-family housing in PRs w/ above-median HOR"
  ),
  cex=.45, lty=c(1,2,1,2), lwd=lwds, col=c("red", "red", "grey20", "grey20"),
  bty="n", seg.len=3
)
abline(h=0, col="black", lwd=0.5)
dev.off()
#
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #




# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# . 3 Panel regression results ####

# Main analysis: At PR level
ror <- readRDS("data/tmp/panel_pr.RDS")
#
# Total supply in current year
ror_year_tot <- ror[,c("SN_ROR", "Jahr", c("NX2U1", sprintf("NX2U1_%02d", 1:9)))]
ror_year_tot$Jahr <- ror_year_tot$Jahr - 1
ror_year_tot$NX2U1_tot_12m <-  apply(ror_year_tot[,c("NX2U1", sprintf("NX2U1_%02d", 1:9))], 1, sum)
ror <- merge(ror, ror_year_tot[,c("SN_ROR", "Jahr", "NX2U1_tot_12m")],
             by=c("SN_ROR", "Jahr"), all.x=T)
ror$NX2U1_tot_12m <- ror$NX2U1_11 + ror$NX2U1_10 + ror$NX2U1 + ror$NX2U1_tot_12m
#
#
#
# . 3.1 Mean rents #####
# . 3.1.1 Baseline  ####
# ~~~ TAB 3 ~~~ ####
#   Main results
sf <- sf2 <- list() ; m <- 0; KPF <- Rsq <- character(3)

# (1) Nov+Dec completions as share of avg # newbuilds
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag)   | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])
sf2[[m]] <- summary(fit$stage1, robust=T)
Rsq[m] <- sprintf("%.3f",sf[[m]]$adj.r.squared)


# (2) Nov+Dec completions as share of avg # newbuilds
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + log(unemprate_lag)  | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])
sf2[[m]] <- summary(fit$stage1, robust=T)
Rsq[m] <- sprintf("%.3f",sf[[m]]$adj.r.squared)

# (3) Nov+Dec completions as share of avg # newbuilds + GDP controls
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~
      log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) |
      Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])
sf2[[m]] <- summary(fit$stage1, robust=T)
Rsq[m] <- sprintf("%.3f",sf[[m]]$adj.r.squared)

#
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# units completed annually"),
    c("log(employment_lag)","Log employment,", "year $t-1$"),
    c("log(unemprate_lag)","Log unemployment rate,", "year $t-1$"),
    c("students_per1000_lag", "U \\& college students", " per 1,000 inh., year $t-1$")
  ),
  f = "tables/TAB_3A.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)
make.table(
  cf = sf2,
  labels = rbind(
    c("rainfall_spells_summer", "Rainfall spell instrument", " (avg. length, Jul-Sep of year t-1)"),
    c("log(employment_lag)","Log employment,", "year $t-1$"),
    c("log(unemprate_lag)","Log unemployment rate,", "year $t-1$"),
    c("students_per1000_lag", "U \\& college students", " per 1,000 inh., year $t-1$")
  ),
  f = "tables/TAB_3B.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    Rsq,
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf2, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Adj. R squared","Number of PRs","Observations"),
  significance.level = .1
)




# . 3.1.2 Robustness ####
#
# ~~~ TAB O-C1 ~~~ #####
#  Floods
sf <- list() ; m <- 0; KPF <- character(2)
#  Non-flood years
sel <- ! (
  (ror$Jahr == 2014 & floor(ror$SN_ROR*1e-2) %in% c(3, 6, 7, 8, 9, 14:16)) |
    (ror$Jahr == 2018 & floor(ror$SN_ROR*1e-2) %in% c(3, 15, 16))
)
ror$flood <- as.integer(!sel)
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) + flood | ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror[sel,]
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually"),
    c("flood", "Dummy: severe flood in federal state", "in year $t-1$")
  ),
  f = "tables/TAB_O-C1.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)

# ~~~ TAB O-C2 ~~~ ####
#   additional controls
sf <- list() ; m <- 0; KPF <- character(3)
#
# Adding controls: share no degree
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) + share_no_degree_lag  |
      ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
# Adding controls: log household income
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) + log(hours_worked_lag) + log(gross_labinc_lag) |
      ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
# Adding controls: contemporaneous variables
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) +
      log(employment) + students_per1000 +
      log(unemprate)  |
      ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually"),
    c("log(employment_lag)","Log employment,", "year $t-1$"),
    c("students_per1000_lag", "U \\& college students", " per 1,000 inh., year $t-1$"),
    c("log(unemprate_lag)","Log unemployment rate,", "year $t-1$"),
    c("share_no_degree_lag", "Share w/o school degree", "(\\textit{Hauptschulabschluss})"),
    c("log(hours_worked_lag)","Log hours worked,", " year $t-1$"),
    c("log(gross_labinc_lag)","Log avg. gross labor income,", " year $t-1$"),
    c("log(employment)","Log employment,", "year $t$"),
    c("students_per1000", "U \\& college students", " per 1,000 inh., year $t$"),
    c("log(unemprate)","Log unemployment rate,", "year $t$")
  ),
  f = "tables/TAB_O-C2.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)

# ~~~ TAB O-C3 ~~~ ####
#     alternative instruments
sf <- list() ; m <- 0; KPF <- character(4)
#  (1) # longest rainfall spell
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_longest_spell) | SN_ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#  (2) # of spells > 4 days
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_t5_spells) | SN_ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#  (3) frost depth
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ frost_depth_feb) | SN_ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#  (4) Frost depth + rainfall instruments jointly
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (
        I(NX2U1_autumn/NX2U1_tot_avg) ~ frost_depth_feb + rainfall_spells_summer
      ) | SN_ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually")
  ),
  f = "tables/TAB_O-C3.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)

# ~~~ TAB O-C4 ~~~ ####
#     log tot supply
ror$NX2U1_tot <- apply(ror[,c("NX2U1", sprintf("NX2U1_%02d", 1:11))], 1, sum)
# (1) log completions whole year
sf <- list() ; m <- 0; KPF <- character(2)
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~
      log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) |
      Jahr + SN_ROR | (log(NX2U1_tot) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])
# (2) completions 12 months after rainfall shock
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~
      log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) |
      Jahr + SN_ROR | (I(NX2U1_tot_12m/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit_stage1 <- fit$stage1)$iv1fstat["F"])
#
sf[[m <- m + 1]] <-  summary(fit_stage1)
#
make.table(
  cf = sf,
  labels = rbind(
    c("`log(NX2U1_tot)(fit)`","Log \\# of units completed in $t-1$", ""),
    c("`I(NX2U1_tot_12m/NX2U1_tot_avg)(fit)`","\\# of units completed within 12 months after", "rainfall shock, per avg. \\# of units completed annually"),
    c("rainfall_spells_summer", "Rainfall shock", "")
  ),
  f ="tables/TAB_O-C4.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    c(sprintf("%.1f",as.numeric(KPF)),"--"),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)


# ~~~ TAB O-C5 ~~~ ####
#  specs in changes
pror <- pdata.frame(ror, index=c("SN_ROR", "Jahr"))

sf <- list() ; m <- 0; KPF <- character(2)
sf[[m <- m+1]] <- summary(
  fit <- felm(
    diff(Index) ~ diff(log(employment_lag)) + diff(students_per1000_lag) +
      diff(log(unemprate_lag))  | Jahr + SN_ROR | (
        I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer
      ) | ROR,
    data=pror
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])

# Compute measure of housing stock by year and PR
#  from stock in 2011 and add new supply in each year
out <- ror[ror$Jahr == 2011,c("SN_ROR", "Einh", sprintf("NX2U1_%02d",1:4))]
out$stock <- out$Einh - apply(out[,sprintf("NX2U1_%02d",1:4)], 1, sum) # census was conducted in May 2011
out$Jahr <- 2010
out$log_diff_stock <- NA
for(jahr in 2011:2018) {
  tmp <- ror[ror$Jahr == jahr,c("SN_ROR", "Jahr", "NX2U1_tot")]
  tmp <- merge(tmp, out[out$Jahr == jahr - 1,c("SN_ROR", "stock")], by=c("SN_ROR"))
  tmp$log_diff_stock <- log(tmp$stock + tmp$NX2U1_tot) - log(tmp$stock)
  tmp$stock <- tmp$stock + tmp$NX2U1_tot
  out <- rbind(
    out[,c("SN_ROR", "Jahr", "stock", "log_diff_stock")],
    tmp[,c("SN_ROR", "Jahr", "stock", "log_diff_stock")]
  )
}
ror <- merge(ror[,names(ror) != "log_diff_stock"],
             out[,c("SN_ROR", "Jahr", "log_diff_stock")], by=c("SN_ROR", "Jahr"))
pror <- pdata.frame(ror, index=c("SN_ROR", "Jahr"))
#
sf[[m <- m+1]] <- summary( # include as second column in Table O-C5
  fit <- felm(
    diff(Index) ~ diff(log(employment_lag)) +
      diff(students_per1000_lag) + diff(unemprate_lag)  |
      Jahr + SN_ROR | (log_diff_stock ~ rainfall_spells_summer) | ROR,
    data=pror
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually"),
    c("`log_diff_stock(fit)`","$\\Delta$ Log housing stock, year $t-1$", "")
  ),
  f = "tables/TAB_O-C5.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)

# ~~~ TAB O-C6 ~~~ ####
#  different definitions of local housing markets
#
sf <- list() ; m <- 0; KPF <- numeric(2)
# (1) Commuting zone-level
#  (Arbeitsmarktregionen)
amr <- readRDS("data/tmp/panel_cz.RDS")
amr$SN_ROR1 <- amr$SN_ROR2  <- amr$SN_ROR3 <- 0
for(sn_amr in unique(amr$SN_AMR)) {
  districts <- unique(unlist(strsplit(amr$districts[amr$SN_AMR == sn_amr], ",")))
  sn_rors <- NULL
  for(district in districts)  sn_rors <- unique(c(sn_rors, ror$SN_ROR[grepl(district, ror$districts)]))
  if(length(sn_rors) == 3) {
    amr$SN_ROR1[amr$SN_AMR == sn_amr] <- sn_rors[1]
    amr$SN_ROR2[amr$SN_AMR == sn_amr] <- sn_rors[2]
    amr$SN_ROR3[amr$SN_AMR == sn_amr] <- sn_rors[3]
  } else if(length(sn_rors) == 2) {
    amr$SN_ROR1[amr$SN_AMR == sn_amr] <- sn_rors[1]
    amr$SN_ROR2[amr$SN_AMR == sn_amr] <- sn_rors[2]
  } else if(length(sn_rors) == 1) {
    amr$SN_ROR1[amr$SN_AMR == sn_amr] <- sn_rors
  }
}
amr <- merge(
  amr, ror[,c("SN_ROR", "Jahr", "employment_lag", "unemprate_lag", "students_per1000_lag")],
  suffixes=c("", ""), by.x=c("SN_ROR1", "Jahr"), by.y=c("SN_ROR", "Jahr")
)
amr <- merge(
  amr, ror[,c("SN_ROR", "Jahr", "employment_lag", "unemprate_lag", "students_per1000_lag")],
  suffixes=c("", "2"), by.x=c("SN_ROR2", "Jahr"), by.y=c("SN_ROR", "Jahr"), all.x=T
)
amr <- merge(
  amr, ror[,c("SN_ROR", "Jahr", "employment_lag", "unemprate_lag", "students_per1000_lag")],
  suffixes=c("", "3"), by.x=c("SN_ROR3", "Jahr"), by.y=c("SN_ROR", "Jahr"), all.x=T
)
amr$employment_lag <- apply(amr[,sprintf("employment_lag%s", c("", "2", "3"))], 1, mean, na.rm=T)
amr$unemprate_lag <- apply(amr[,sprintf("unemprate_lag%s", c("", "2", "3"))], 1, mean, na.rm=T)
amr$students_per1000_lag <- apply(amr[,sprintf("students_per1000_lag%s", c("", "2", "3"))], 1, mean, na.rm=T)

amr$NX2U1_autumn <- amr$NX2U1_10 + amr$NX2U1_11 + amr$NX2U1

#
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_AMR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | SN_AMR,
    data=amr
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]


# (2) Districts (Kreise/kreisfreie Staedte)
kp <- readRDS("data/tmp/panel_districts.RDS")
kp$SN_ROR <- kp$SN_ROR2  <- kp$SN_ROR3 <- 0
for(district in unique(kp$Kreis)) {
  sn_ror <- unique(ror$SN_ROR[grepl(district, ror$districts)])
  kp$SN_ROR[kp$Kreis == district] <- sn_ror
}
kp <- merge(
  kp, ror[,c("SN_ROR", "Jahr", "employment_lag", "unemprate_lag", "students_per1000_lag")],
  suffixes=c("", ""), by=c("SN_ROR", "Jahr")
)


sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | Kreis + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | Kreis,
    data=kp
  ),
  robust=T
)
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]

#
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually")
  ),
  f = "tables/TAB_O-C6.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    c(c("commuting zone","district")),
    c(length(table(amr$SN_AMR)), length(table(kp$Kreis))),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","Location FE","Other controls","Spatial unit","Number of spatial units","Kleibergen-Paap F","Observations"),
  significance.level = .1
)

# ~~~ TAB O-C7 ~~~#####
#
sf <- sf2 <- list() ; m <- 0; KPF <- Rsq <- character(2)
# (1) High population density
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~
      log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) |
      Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror[ror$high_popdens,]
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])
sf2[[m]] <- summary(fit$stage1, robust=T)
Rsq[m] <- sprintf("%.3f",sf[[m]]$adj.r.squared)

# (2) Low population density
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~
      log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) |
      Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror[!ror$high_popdens,]
  ),
  robust=T
)
KPF[m] <- sprintf("%.1f",summary(fit$stage1)$iv1fstat["F"])
sf2[[m]] <- summary(fit$stage1, robust=T)
Rsq[m] <- sprintf("%.3f",sf[[m]]$adj.r.squared)

make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually")
  ),
  f = "tables/TAB_O-C7.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)

# . 3.1.3 IV balance ####
# ~~~ FIG O-C1 ~~~ ####
plot_cf <- function(x0, x, i=1, alpha=.05, cex=.5, lwd=1, nvars=10) {
  library("RColorBrewer")
  cols <- brewer.pal(n = 8, name = "RdBu")
  segments(
    y0=nvars+1 - x0,
    x0=coef(x)[i,1] + qnorm(alpha/2) * coef(x)[i,2],
    x1=coef(x)[i,1] - qnorm(alpha/2) * coef(x)[i,2],
    lwd=lwd, col=ifelse(x0 < 3, cols[7], cols[2])
  )
  segments(
    y0=nvars+1 - x0 - .1,
    y1=nvars+1 - x0 + .1,
    x0=coef(x)[i,1],
    lwd=1.5*lwd, col=ifelse(x0 < 3, cols[8], cols[1])
  )
}

png("figures/FIG_O-C1.png", height=5.5, width=10, units = "cm", res=300)
par(mar=c(2.5,8.5,0.1,0.1), mgp=c(.95,0.2,0))
plot(
  NA, ylim=c(.8, 11.2), xlim=c(-.085, .06), axes=F, ylab="", xlab=""
)
segments(y0=0, y1=10.2, x0=0)
axis(2, at=10:1,
     labels = c(
       "Hedonic rent index, year after rainfall shock",
       "Oct-Dec completions, year of rainfall shock",
       "Hedonic rent index, year of rainfall shock",
       "Jan-Jun completions, year of rainfall shock",
       "Log employment, year of rainfall shock",
       "Log unemp. rate, year of rainfall shock",
       "Students per capita, year of rainfall shock",
       "Log GDP per capita, year of rainfall shock",
       "Log gross labor income, year of rainfall shock",
       "Log household income, year of rainfall shock"
     ),
     cex.axis=.45, tck= -0.0065, las=2)
axis(2, at=11,
     labels = c(
       "Outcome (standardized)"
     ),
     cex.axis=.5, tck= 0, las=2, hadj=1.2, col="white")
axis(2, at=c(-10, 10.2), labels=c("",""), tck= 0)
axis(
  1, at = seq(-.12, .8, .02),
  labels = sprintf("%.2f",seq(-.12, .8, .02)),
  las=1, cex.axis=.45,
  tck= -0.0065
)
mtext(side=1, text="Coefficient of rainfall instrument", cex=.5, line=1.3)
#
m <- 0

# (1) Rent index (reduced form)
sf <- summary(
  fit <- felm(
    scale(Index) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
# (2) Completions Nov + Dec (first stage)
sf <- summary(
  fit <- felm(
    scale(NX2U1_autumn / NX2U1_tot_avg) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (3) Lagged rent index
pror <- pdata.frame(ror, index=c("SN_ROR", "Jahr"))
ror$L_Index <- lag(pror$Index)
sf <- summary(
  fit <- felm(
    scale(L_Index) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror[! is.na(ror$L_Index),]
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (4) Completions first half of year
ror$completions_1st_half <- apply(ror[,sprintf("NX2U1_%02d", 1:6)], 1, sum)
sf <- summary(
  fit <- felm(
    scale(I(completions_1st_half / NX2U1_tot_avg)) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (5) Lagged log employment
sf <- summary(
  fit <- felm(
    scale(log(employment_lag)) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (6) Lagged log unemployment rate
sf <- summary(
  fit <- felm(
    scale(log(unemprate_lag)) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (7) Lagged students per 1000 inh
sf <- summary(
  fit <- felm(
    scale(students_per1000_lag) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (8) Log gdp
sf <- summary(
  fit <- felm(
    scale(log(gdp_per_capita_lag)) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (9) Log gross labor income
sf <- summary(
  fit <- felm(
    scale(log(gross_labinc_lag)) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
# (10) Log household income
sf <- summary(
  fit <- felm(
    scale(log(household_income_lag)) ~ rainfall_spells_summer | SN_ROR + Jahr | 0 | SN_ROR,
    data=ror
  ),
  robust=T
)
plot_cf(x0=m <- m + 1, x=sf)
#
abline(h=c(11 - 2.5), col="grey", lty=3)
##
dev.off()



# . 3.1.4 High-demand markets ####
# ~~~ TAB 4 ~~~ ####
ror_diff <- merge(ror[ror$Jahr == 2018,], ror[ror$Jahr == 2011,], by="SN_ROR", suffixes=c("2018", "2011"))

# Demand variables
ror_diff$longD_log_labinc <- log(ror_diff$gross_labinc2018) - log(ror_diff$gross_labinc2011) #
ror_diff$longD_log_hhinc <- log(ror_diff$household_income2018) - log(ror_diff$household_income2011) #
ror_diff$longD_log_employment <- log(ror_diff$employment2018) - log(ror_diff$employment2011) #
#
ror <- merge(
  ror, ror_diff[,c("SN_ROR", "longD_log_labinc", "longD_log_hhinc","longD_log_employment")
  ], by="SN_ROR"
)
#
sel.hd_labinc <- ror$longD_log_labinc > median(ror$longD_log_labinc[ror$Jahr == 2011]) #
sel.hd_hhinc <- ror$longD_log_hhinc > median(ror$longD_log_hhinc[ror$Jahr == 2011]) #
sel.hd_emp <- ror$longD_log_employment > median(ror$longD_log_employment[ror$Jahr == 2011]) #

#
sf <- list() ; m <- 0; KPF <- character(3)
# (1) Increasing log employment
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | SN_ROR,
    data=ror[sel.hd_emp,]
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]

# (2) Increasing log gross labor income
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | SN_ROR,
    data=ror[sel.hd_labinc,]
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]

# (3) Increasing log household income
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | SN_ROR,
    data=ror[sel.hd_hhinc,]
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]
#
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually")
  ),
  f = "tables/TAB_4.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)

# . 3.2 Quantities #####
#  ~~~ TAB 5 ~~~ ####
#       Effect on quantity traded
sf <- list() ; m <- 0; KPF <- character(6)
# (1) Existing units, mod 1
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    I(flow_existing/NX2U1_tot_avg) ~  log(employment_lag) #+ students_per1000_lag + log(unemprate_lag)
    | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]
# (2) Existing units, mod 2
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    I(flow_existing/NX2U1_tot_avg) ~  log(employment_lag) + students_per1000_lag #+ log(unemprate_lag)
    | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]
# (3) Existing units, mod 3
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    I(flow_existing/NX2U1_tot_avg) ~  log(employment_lag) + students_per1000_lag + log(unemprate_lag)
    | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]
# (4) New units, mod 1
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    I(flow_new/NX2U1_tot_avg) ~  log(employment_lag) #+ students_per1000_lag + log(unemprate_lag)
    | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]
# (5) New units, mod 2
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    I(flow_new/NX2U1_tot_avg) ~  log(employment_lag) + students_per1000_lag #+ log(unemprate_lag)
    | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]
# (6) New units, mod 3
sf[[m <- m + 1]] <- summary(
  fit <- felm(
    I(flow_new/NX2U1_tot_avg) ~  log(employment_lag) + students_per1000_lag + log(unemprate_lag)
    | Jahr + SN_ROR | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | ROR,
    data=ror
  ),
  robust=T
)
KPF[m] <- fit$stage1$iv1fstat[[1]]["F"]
#
make.table(
  cf = sf,
  labels = rbind(
    c("`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# of units completed annually"),
    c("log(employment_lag)","Log employment,", "year $t-1$"),
    c("students_per1000_lag", "U \\& college students", " per 1,000 inh., year $t-1$"),
    c("log(unemprate_lag)","Log unemployment rate,", "year $t-1$")
  ),
  f = "tables/TAB_5.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(unique(x$fe[2][[1]])))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE", "Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)









# . 3.3 Rent quantiles #####

# Compute hedonic indices for deciles 1, ..., 9
require(lfe)
y <- readRDS("data/tmp/rent_data_estimation.RDS")
#
y <- y[y$Zimmer %in% seq(1, 7, .5),]
#
y$rent_psqm <- y$Kaltmiete / y$Flaeche
y$Typ <- as.factor(y$Typ)
y$Jahr <- as.factor(y$Jahr)
y$Ausstattung <- as.factor(y$Ausstattung)
y$log_rent_psqm <- log(y$rent_psqm)
y$log_rent <- log(y$Kaltmiete)
y$log_flaeche <- log(y$Flaeche)
y$Alter <- as.integer(as.character(y$Jahr)) - y$Baujahr
#
y$units_pct <- y$NX2U1_autumn / y$NX2U1_tot_avg
#
# Compute log hedonic index for each decile, by PR
require(lfe)
require(quantreg)
mod <- log_rent_psqm ~ log_flaeche + Baujahr +
  Baujahr_NA + Aufzug +
  EBK + GaesteWC + BalkonTerrasse + Jahr
taus <- seq(.1,.9,.1)
#
out <- NULL
y <- y[! is.na(y$SN_ROR),]
all.units  <- unique(y$SN_ROR)
for(sn_ror in all.units) { # by PR
  if(sum(y$SN_ROR == sn_ror) > 100) {
    #
    cat(sn_ror, "\n")
    tmp.data <- y[y$SN_ROR == sn_ror,]
    #
    # Quantile regression estimates, q = 1, .., 9
    index <- NULL
    for(k in 1:9) {
      fit <- rq(mod, tau=taus[k], data=tmp.data, method="pfn")
      index <- cbind(index, coef(fit))
    }
    index <- rbind(0, index[grepl("Jahr", rownames(index)),][sprintf("Jahr%04d",2012:2018),])
    colnames(index) <- sprintf("Index%d", 1:9); rownames(index) <- NULL
    #
    # Append
    out <- rbind(
      data.frame(SN_ROR = sn_ror, Jahr = 2011:2018, index),
      out
    )
    #
  }
}

# Compute mean rent/sqm and deciles 1, ..., 9 of rent/sqm variable
ror_rents <- aggregate(
  data.frame(rent_psqm = y$rent_psqm), by=list(SN_ROR = y$SN_ROR, Jahr = y$Jahr), FUN=mean
)
for(k in 1:9) {
  tmp <- aggregate(
    data.frame(rent_psqm = y$rent_psqm), by=list(SN_ROR = y$SN_ROR, Jahr = y$Jahr), FUN=function(x) quantile(x, k/10)
  )
  names(tmp)[3] <- sprintf("rent_psqm_q%d0", k)
  ror_rents <- merge(ror_rents, tmp, by=c("SN_ROR", "Jahr"))
}
ror <- merge(out, ror_rents, by=c("SN_ROR","Jahr"))

ror_panel <- readRDS("data/tmp/panel_pr.RDS")
ror <- merge(ror, ror_panel, by=c("SN_ROR","Jahr"))

# ~~~ TAB 1 ~~~ #####
#    Summary stats PR panel
fn <- "tables/TAB_1%s.tex"

vpi <- data.frame(
  Jahr = 2010:2018,
  vpi = c(93.2, 95.2, 97.1, 98.5, 99.5, 100, 100.5, 102.0, 103.8) / 100
)
vpi$vpi <- vpi$vpi / vpi[vpi$Jahr == 2018,"vpi"]
ror <- merge(ror, vpi, by="Jahr")
for(outvar in c("Index", sprintf("Index%d", c(1, 3, 5,7,9)))) {
  ror[,outvar] <- ror[,outvar] - log(ror[,"vpi"])
  for(sn_ror in unique(ror$SN_ROR)) ror[ror$SN_ROR == sn_ror,outvar] <-
      ror[ror$SN_ROR == sn_ror,outvar] - ror[ror$SN_ROR == sn_ror & ror$Jahr == 2011,outvar]
}
ror <- merge(ror, ror[ror$Jahr == 2011,c("SN_ROR","rent_psqm")], by=c("SN_ROR"), suffixes=c("", "_real"))
ror$rent_psqm_real <- ror$rent_psqm_real * exp(ror$Index)

ror$log_new_supply <- log(ror$NX2U1_tot)
ror$new_supply_share <- ror$NX2U1_autumn / ror$NX2U1_tot_avg
sel <- ! (
  (ror$Jahr == 2014 & floor(ror$SN_ROR*1e-2) %in% c(3, 6, 7, 8, 9, 14:16)) |
    (ror$Jahr == 2018 & floor(ror$SN_ROR*1e-2) %in% c(3, 15, 16))
)
ror$flood <- as.integer(!sel)
ror$employment_lag_1000 <- ror$employment_lag / 1000
ror$share_no_degree_lag <- ror$share_no_degree_lag / 100
ror$unemprate_lag <- ror$unemprate_lag / 100

numvars <- list(
  A = c(
    "rent_psqm_real", # outcomes
    "Index",
    sprintf("Index%d", c(1, 3, 5,7,9))
  ),
  B = c(
    "new_supply_share", "log_new_supply", # endogenous vars
    "rainfall_spells_summer","frost_depth_feb", # instruments
    "rainfall_longest_spell", "rainfall_t5_spells" # alternative instruments
  ),
  C = c(
    "employment_lag_1000", "unemprate_lag", "students_per1000_lag",
    "share_no_degree_lag", "hours_worked_lag", "gross_labinc_lag",
    "flood"
  )
)


numlabels <- list(
  A = c(
    "Real monthly rent/sqm (index-based, 2018 EUR)",
    "Log mean real rent index ($2011=0$)",  "Log real rent index 1st decile ($2011=0$)",
    "Log real rent index 3rd decile ($2011=0$)",  sprintf("Log real rent index %dth decile ($2011=0$)", c(5,7,9))
  ),
  B = c(
    "New supply in Oct--Dec per yearly avg. \\# of newbuilds", "Log new supply (whole year)",
    "Average summer rainfall spell (deviation)", "Feb. frost depth (deviation)",
    "Longest rainfall spell (deviation)", "Number of spells w/ 5+ days (deviation)"
  ),
  C = c(
    "Employment (1,000's)", "Unemployment rate",
    "U \\& college students per 1,000 residents",
    "Share w/o school degree", "Hours worked per worker in year", "Gross average labor income",
    "Dummy: Heavy flood in federal state"
  )
)
for(k in 1:3) {
  digits1 <- ifelse(k < 3,3,1)
  digits2 <- 3
  cat("Variable & Min & Mean & Q25 & Median & Q75 & Max", "\\\\ \n",
      file=sprintf(fn, names(numvars)[k]), append=F) #
  for(i in 1:length(numvars[[k]])) {
    digits <- ifelse(i %in% c(1, 5, 6),digits1,digits2)
    thisvar <- numvars[[k]][i]
    thislabel <- numlabels[[k]][i]
    stats <- round(c(
      min(ror[,thisvar], na.rm=T),
      mean(ror[,thisvar], na.rm=T),
      quantile(ror[,thisvar], c(.25,.5,.75), na.rm=T),
      max(ror[,thisvar], na.rm=T)
    ),digits)
    string <- format(stats, big.mark=",", digits = digits)
    line <- paste0(c(paste("\\;\\;",thislabel), sprintf("%s", string)), collapse=" & ")
    cat(line, "\\\\ \n", file=sprintf(fn, names(numvars)[k]), append=T) #
  }
}
#
#
# ~~~ FIG 3 ~~~ ####
#      Results: Quantile indices
get_ci <- function(x, alpha=.05) c(x[1], x[1] + qnorm(alpha/2) * x[2], x[1] + qnorm(1-alpha/2) * x[2])
#
# FIG 3A: Estimate baseline model for each decile
mod <- paste0(
  "Index%d ~ log(employment_lag) + students_per1000_lag ",
  "+ log(unemprate_lag) | SN_ROR + Jahr | ",
  "(I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | SN_ROR"
)
sf <- list() ; m <- 0; KPF <- numeric(9)
plot_cf <- matrix(NA, nrow=9, ncol=3)
for(k in 1:9) {
  fmod <- formula(sprintf(mod, k))
  sf[[k]] <- summary(fit <- felm(fmod, data=ror),robust=T)
  KPF[k] <- summary(fit$stage1)$iv1fstat["F"]
  plot_cf[k,] <- get_ci(coef(sf[[k]])[4,])
}
# Graphical output
xrange <- 1:9
yrange <- c(-.6, 0.05)
png("figures/FIG_3A.png", height=7, width=6.7, units = "cm", res=300)
par(mar=c(1.8,1.8,0.1,0.1), mgp=c(.8,0.2,0))
plot(NA,
     ylim=yrange, xlim=range(xrange),
     axes=F, ylab="", xlab="", cex=.4, cex.lab=.5
)
abline(h=0)
segments(
  x0=xrange,
  y0= plot_cf[,2],
  y1= plot_cf[,3]
)
segments(
  x0=xrange - .05,
  x1=xrange + .05,
  y0= plot_cf[,1]
)
axis(1, at = c(-2, 14))
axis(
  1, at=xrange, labels = sprintf("q%d", xrange*10),
  las=1, cex.axis=.5, cex.lab=.6,
  tck= -0.0065
)
at.y <- seq(-.7, 0.1, .1)
axis(
  2, at = at.y ,
  labels = sprintf("%.2f", at.y),
  las=1, cex.axis=.4, cex.lab=.45,
  tck= -0.0065
)
abline(h=-0.191687, col="grey50", lwd=.5)
mtext(side=2, text="Coefficient", cex=.55, line=1)
dev.off()


postscript("figures/FIG_3A.eps", height=4, width=5, onefile=T, horizontal=F)
par(mar=c(2,3.5,0.1,0.1), mgp=c(.8,0.3,0))
plot(NA,
     ylim=yrange, xlim=range(xrange),
     axes=F, ylab="", xlab="", cex=1, cex.lab=1
)
abline(h=0)
segments(
  x0=xrange,
  y0= plot_cf[,2],
  y1= plot_cf[,3]
)
segments(
  x0=xrange - .05,
  x1=xrange + .05,
  y0= plot_cf[,1]
)
axis(1, at = c(-2, 14))
axis(
  1, at=xrange, labels = sprintf("q%d", xrange*10),
  las=1, cex.axis=1, cex.lab=1,
  tck= -0.0065
)
at.y <- seq(-.7, 0.1, .1)
axis(
  2, at = at.y ,
  labels = sprintf("%.1f", at.y),
  las=1, cex.axis=1, cex.lab=1,
  tck= -0.0065
)
abline(h=-0.191687, col="grey50", lwd=1)
mtext(side=2, text="Coefficient", cex=1, line=2.5)
dev.off()


# FIG 3B: High/low local homeownership rates
# (1) High HOR PRs
sel_high_hor <- ror$hor_2011 > median(ror$hor_2011[ror$Jahr == 2011])
summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | SN_ROR,
    data=ror[sel_high_hor,]
  ),
  robust=T
)
mean_effect_H <- coef(fit)["`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`"]
fit$stage1$iv1fstat[[1]]["F"]

# (2) Low HOR PRs
summary(
  fit <- felm(
    Index ~ log(employment_lag) + students_per1000_lag +
      log(unemprate_lag) | SN_ROR + Jahr | (I(NX2U1_autumn/NX2U1_tot_avg) ~ rainfall_spells_summer) | SN_ROR,
    data=ror[!sel_high_hor,]
  ),
  robust=T
)
mean_effect_L <- coef(fit)["`I(NX2U1_autumn/NX2U1_tot_avg)(fit)`"]
fit$stage1$iv1fstat[[1]]["F"]
#
#
sf <- list() ; m <- 0; KPF <- numeric(9)
plot_cf <- matrix(NA, nrow=9, ncol=3)
for(k in 1:9) {
  fmod <- formula(sprintf(mod, k))
  sf[[k]] <- summary(fit <- felm(fmod, data=ror[sel_high_hor,]),robust=T)
  KPF[k] <- summary(fit$stage1)$iv1fstat["F"]
  plot_cf[k,] <- get_ci(coef(sf[[k]])[4,], alpha=.1)
}
sf <- list() ; m <- 0; KPF <- numeric(9)
plot_cf2 <- matrix(NA, nrow=9, ncol=3)
for(k in 1:9) {
  fmod <- formula(sprintf(mod, k))
  sf[[k]] <- summary(fit <- felm(fmod, data=ror[!sel_high_hor,]),robust=T)
  KPF[k] <- summary(fit$stage1)$iv1fstat["F"]
  plot_cf2[k,] <- get_ci(coef(sf[[k]])[4,], alpha=.1)
}

png("figures/FIG_3B.png", height=7, width=6.7, units = "cm", res=300)
par(mar=c(1.8,1.8,0.1,0.1), mgp=c(.8,0.2,0))
plot(NA,
     ylim=yrange, xlim=range(xrange),
     axes=F, ylab="", xlab="", cex=.4, cex.lab=.5
)
abline(h=mean_effect_L, col="grey50", lwd=.5, lty=3)
abline(h=mean_effect_H, col="black", lwd=.5, lty=2)
abline(h=0)
segments(
  x0=xrange+.1,
  y0= plot_cf[,2],
  y1= plot_cf[,3], col="black"
)
segments(
  x0=xrange+.1 - .05,
  x1=xrange+.1 + .05,
  y0= plot_cf[,1] , col="black"
)
segments(
  x0=xrange-.1,
  y0= plot_cf2[,2],
  y1= plot_cf2[,3], col="grey50"
)
segments(
  x0=xrange-.1 - .05,
  x1=xrange-.1 + .05,
  y0= plot_cf2[,1] , col="grey50"
)
axis(1, at = c(-2, 14))
axis(
  1, at=xrange, labels = sprintf("q%d", xrange*10),
  las=1, cex.axis=.5, cex.lab=.6,
  tck= -0.0065
)
at.y <- seq(-.7, 0.1, .1)
axis(
  2, at = at.y ,
  labels = sprintf("%.2f", at.y),
  las=1, cex.axis=.4, cex.lab=.45,
  tck= -0.0065
)
mtext(side=2, text="Coefficient", cex=.55, line=1)
legend(
  "bottomleft",
  legend=c(
    "Below-median homeownership rate",
    "Above-median homeownership rate"
  ),
  cex=.55, bty="n", pch=3, col=c("grey50", "black")#
)
dev.off()
#

postscript("figures/FIG_3B.eps", height=4, width=5, onefile=T, horizontal=F)
par(mar=c(2,3.5,0.1,0.1), mgp=c(.8,0.3,0))
plot(NA,
     ylim=yrange, xlim=range(xrange),
     axes=F, ylab="", xlab="", cex=.4, cex.lab=1
)
abline(h=mean_effect_L, col="grey50", lwd=1, lty=4)
abline(h=mean_effect_H, col="black", lwd=1, lty=2)
abline(h=0)
segments(
  x0=xrange+.1,
  y0= plot_cf[,2],
  y1= plot_cf[,3], col="black"
)
segments(
  x0=xrange+.1 - .05,
  x1=xrange+.1 + .05,
  y0= plot_cf[,1] , col="black"
)
segments(
  x0=xrange-.1,
  y0= plot_cf2[,2],
  y1= plot_cf2[,3], col="grey50"
)
segments(
  x0=xrange-.1 - .05,
  x1=xrange-.1 + .05,
  y0= plot_cf2[,1] , col="grey50"
)
axis(1, at = c(-2, 14))
axis(
  1, at=xrange, labels = sprintf("q%d", xrange*10),
  las=1, cex.axis=1, cex.lab=1,
  tck= -0.0065
)
at.y <- seq(-.7, 0.1, .1)
axis(
  2, at = at.y ,
  labels = sprintf("%.1f", at.y),
  las=1, cex.axis=1, cex.lab=1,
  tck= -0.0065
)
mtext(side=2, text="Coefficient", cex=1, line=2.5)
legend(
  "bottomleft",
  legend=c(
    "Below-median homeownership rate",
    "Above-median homeownership rate"
  ),
  cex=1, bty="n", pch=3, col=c("grey50", "black")#, lty=c(3,2), seg.len=4
)
dev.off()
#


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #






# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# . 4 Unit-level results #####
#     Summary stats and regression results
require(lfe)
y <- readRDS("data/tmp/rent_data_estimation.RDS")
y$Typ <- as.factor(y$Typ) # dwelling type
y$Jahr <- as.factor(y$Jahr) # year of observation
y$Ausstattung <- as.factor(y$Ausstattung) # quality indicator
y <- y[y$Zimmer %in% seq(1, 7, .5) & ! is.na(y$NX2U1_tot_avg),]
y$Alter <- as.integer(as.character(y$Jahr)) - y$Baujahr # Building vintage

# . 4.1 Summary stats
# ~~~ TAB O-A1 ~~~ ####
y$Baujahr_sumstat <- y$Baujahr # year of construction w/ NA (for sum stat table)
y$Baujahr_sumstat[y$Baujahr_sumstat == 0] <- NA
#
fn <- "tables/TAB_O-A1_%s.tex"
numvars <- c(
  "rent_psqm","Flaeche","Baujahr_sumstat",
  "Fussbodenheizung","Parkett","Aufzug",
  "EBK","GaesteWC","Gartenmit","BalkonTerrasse"
)
numlabels <- c(
  "Monthly rent per sqm","Living area in sqm","Year of construction",
  "Floor heating", "Parquet flooring", "Elevator",
  "Fitted kitchen", "Second bathroom", "Garden", "Balcony or terrace"
)
str0 <- "%.0f"
str1 <- "%.1f"
str2 <- "%.2f"
str3 <- "%.3f"
str4 <- "%.4f"
for(i in 1:length(numvars)) {
  string <- ifelse(i<3,str1,str2)
  string <- ifelse(i==3,str0,string)

  stats <- c(
    min(y[,numvars[i]], na.rm=T),
    mean(y[,numvars[i]], na.rm=T),
    quantile(y[,numvars[i]], c(.25,.5,.75), na.rm=T),
    max(y[,numvars[i]], na.rm=T)
  )
  line <- paste0(c(paste("\\;",numlabels[i]), sprintf(string, stats)), collapse=" & ")
  cat(line, "\\\\ \n", file=sprintf(fn, "A"), append=i>1)
}
#
string <- "%d"
shares <- c(0:8)
line <- paste0(c("", sprintf(string, shares)), collapse=" & ")
cat(line, "\\\\ \n \\hline \\\\[-1.8ex]\n", file=sprintf(fn, "B"), append=F)

string <- "%.3f"
shares <- table(y[,"Typ"])[as.character(c(1:8,0))] / nrow(y)
line <- paste0(c("\\; Dwelling type", sprintf(string, shares)), collapse=" & ")
cat(line, "\\\\ \n", file=sprintf(fn, "B"), append=T)
#
string <- "%.3f"
shares <- table(y[,"Ausstattung"]) / nrow(y)
line <- paste0(c("\\; Quality", sprintf(string, shares)), collapse=" & ")
cat(line, "\\\\ \n", file=sprintf(fn, "B"), append=T)
#
string <- "%d"
cat(line, "\\\\ \n \\hline \\\\[-1.8ex]\n", file=sprintf(fn, "C"), append=F)
line <- paste0(c("\\; Observations", format(nrow(y), big.mark=",")), collapse=" & ")
cat(line, "\\\\ \n", file=sprintf(fn, "C"), append=T)


# . 4.2 Regressions ####
ror <- readRDS("data/tmp/panel_pr.RDS")
#y <- merge(y, ror[ror$Jahr == 2011,c("SN_ROR", "Einh")], by="SN_ROR", suffixes=c("", "_ror"))

y$weight <- 1/y$Einh
y$weight <- y$weight / sum(y$weight) * nrow(y)
y$endogenous_var <- y$NX2U1_autumn / y$NX2U1_tot_avg

base.ctr <- "log(Flaeche) + Fussbodenheizung + Parkett + Aufzug +
                EBK + GaesteWC + BalkonTerrasse + Gartenmit +
                Ausstattung + log(employment_lag) + students_per1000_lag +
                log(unemprate_lag)"
base.mod <- "log(rent_psqm) ~ %s | Jahr + SN_ROR + Baujahr + Typ | (endogenous_var ~ rainfall_spells_summer) | SN_ROR"
mod <- as.formula(sprintf(base.mod, base.ctr))
#
# #
# ~~~ TAB O-C8 ~~~ ####
# Building age class
sf <- list(); KPF <- numeric(7); m <- 0
sel.age <- y$Baujahr_NA == 0
# 1 - All units - unweighted
fit <- felm(mod, data=y) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 2 - All units
fit <- felm(mod, data=y, weights = y$weight) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 3 - All units w/ building age info
fit <- felm(mod, data=y[sel.age,] , weights = y$weight[sel.age]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 4 - newbuilds
fit <- felm(mod, data=y[sel.age & y$Alter %in% 0,], weights = y$weight[sel.age & y$Alter %in% 0]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 5- 1-20 years old
fit <- felm(mod, data=y[sel.age & y$Alter %in% 1:10,], weights = y$weight[sel.age & y$Alter %in% 1:10]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 6 - 21-60
fit <- felm(mod, data=y[sel.age & y$Alter %in% 11:50,], weights = y$weight[sel.age & y$Alter %in% 11:50]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 7 - 61+ years
fit <- felm(mod, data=y[sel.age & y$Alter > 50,], weights = y$weight[sel.age & y$Alter > 50]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
make.table(
  cf = sf,
  labels = rbind(
    c("`endogenous_var(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# units completed annually")
  ),
  f = "tables/TAB_O-C8.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
) #

# ~~~ TAB O-C9 ~~~ ####
# Size of unit
sf <- list(); KPF <- numeric(4); m <- 0
# 4 - newbuilds
fit <- felm(mod, data=y[y$Zimmer %in% seq(1,1.5,.5),], weights = y$weight[y$Zimmer %in% seq(1,1.5,.5)]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 5- 1-20 years old
fit <- felm(mod, data=y[y$Zimmer %in% seq(2,2.5,.5),], weights = y$weight[y$Zimmer %in% seq(2,2.5,.5)]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 6 - 21-60
fit <- felm(mod, data=y[y$Zimmer %in% seq(3,3.5,.5),], weights = y$weight[y$Zimmer %in% seq(3,3.5,.5)]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
# 7 - 61+ years
fit <- felm(mod, data=y[y$Zimmer %in% seq(4,7,.5),], weights = y$weight[y$Zimmer %in% seq(4,7,.5)]) #
print(sf[[m <- m+1]] <- summary(fit, robust=T))
KPF[m] <- summary(fit$stage1)$iv1fstat["F"]
#
make.table(
  cf = sf,
  labels = rbind(
    c("`endogenous_var(fit)`","Units completed Oct--Dec in $t-1$", " per avg. \\# units completed annually")
  ),
  f = "tables/TAB_O-C9.tex",
  add.line=T,
  digits = 3,
  align.stars = F,
  add.stats = list(
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    rep("yes",length(sf)),
    sprintf("%.1f",as.numeric(KPF)),
    unlist(lapply(sf, function(x) length(levels(x$fe$SN_ROR)))),
    unlist(lapply(sf, function(x) sprintf("%s", format(x$N,big.mark=","))))
  ),
  add.stats.names = c("Year FE","PR FE","Other controls","Kleibergen-Paap F","Number of PRs","Observations"),
  significance.level = .1
)
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
